First we define the variables which will be present in the dataset
# Define variables
devices = c("Samsung", "Nokia")
n_devices = length(devices)
sizes = c("small", "medium", "large")
n_sizes = length(sizes)
n_repetitions = 10
repetitions = 0:(n_repetitions-1)
message_types = c("text", "image", "video", "audio", "document")
n_message_types = length(message_types)
apps = c("Telegram", "WhatsApp", "Messenger")
n_apps = length(apps)
n = n_devices * n_sizes * n_repetitions * n_message_types * n_apps
Then we load the dataset from a csv file, clean it and separate the network data (which is aggregated over message types) from the other data which is not aggregated.
# Determine which columns to select
col_selection = c("device", "app_name", "size_name", "repetition", "message_type", "time_sec", "cpu", "memory", "energy_simple_J", "tcp_no_packets", "tcp_size_packets_MB", "udp_no_packets", "udp_size_packets_MB")
# Load data
data = read.table(data_path, sep=",", header=TRUE)
# Select only 900 rows and wanted columns
data = data[1:900, col_selection]
# Rename several columns
colnames(data) = c("device", "app", "size", "repetition", "message_type", "time_sec", "cpu", "memory", "energy", "tcp_no", "tcp_size", "udp_no", "udp_size")
# Make energy values positive
data$energy = -1 * data$energy
# Reorder levels
data$size = factor(data$size, levels=sizes)
data$app = factor(data$app, levels=apps)
data$message_type = factor(data$message_type, levels=message_types)
# Split into regular and network data
network_data = data
# Clean regular data
regular_col_selection = c("device", "app", "size", "repetition", "message_type", "time_sec", "cpu", "memory", "energy")
data = data[, regular_col_selection]
# Clean network data
network_col_selection = c("device", "app", "size", "repetition", "time_sec", "energy", "tcp_no", "tcp_size", "udp_no", "udp_size")
network_data = network_data[, network_col_selection]
for(device in devices) {
for(app in apps) {
for(size in sizes) {
for(repetition in repetitions) {
slice = network_data[network_data$device==device & network_data$app==app & network_data$size==size & network_data$repetition==repetition,]
row_name = rownames(slice)[1]
network_data[row_name,"time_sec"] = sum(slice$time_sec)
network_data[row_name,"energy"] = sum(slice$energy)
}
}
}
}
# Remove NA values
network_data = na.omit(network_data)
Then we begin exploring the data
We start by exploring the different metrics: the dependent variable (energy consumption) as well as the support metrics (Memory, CPU and TDP & UDP)
# Define metrics
# Energy, CPU and Memory
metrics = c("energy", "cpu", "memory")
metric_titles = c("Energy", "CPU", "Memory")
metric_labels = c("Energy (Joule)", "CPU utilization (%)", "Memory usage (bytes)")
# Network metrics
network_metrics = c("tcp_no", "tcp_size", "udp_no", "udp_size")
network_metric_titles = c("TCP #packets", "TCP size", "UDP #packets", "UDP size")
network_metric_labels = c("TCP (number of packets)", "TCP size (MB)", "UDP (number of packets)", "UDP size (MB)")
First we calculate descriptive statistics for the non-aggregated metrics
metric_summary = data %>%
summarise(across(all_of(metrics), .fns =
list(min = min,
median = median,
mean = mean,
sd = sd,
max = max))) %>%
pivot_longer(everything(), names_sep='_', names_to=c('variable', '.value'))
metric_summary
## # A tibble: 3 × 6
## variable min median mean sd max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 energy 4.43 104. 109. 33.9 264.
## 2 cpu 1.6 14.4 15.0 5.62 60.8
## 3 memory 78327. 151277. 150540. 49075. 262608.
Then we calculate descriptive statistics for the aggregated metrics (network metrics)
network_summary = network_data %>%
summarise(across(all_of(network_metrics), .fns =
list(min = min,
median = median,
mean = mean,
sd = sd,
max = max))) %>%
pivot_longer(everything(), names_pattern="(tcp_no|tcp_size|udp_no|udp_size)_(.*)", names_to=c('variable', '.value'))
network_summary
## # A tibble: 4 × 6
## variable min median mean sd max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 tcp_no 151 3728. 4978. 2692. 16959
## 2 tcp_size 0.021 1 1.54 1.48 13
## 3 udp_no 2 169 295. 737. 9401
## 4 udp_size 0 0.074 0.173 0.692 9
For each metric we will:
(i) create a density plot
(ii) create a qq-plot
(iii) perform the Shapiro-Wilk test
To inspect the distribution of each metric and assess normality
plot_density = function(df, metric, title, label) {
# Calculate mean and sd
mean = mean(df[[metric]]) %>% round(digits=2)
median = median(df[[metric]]) %>% round(digits=2)
sd = sd(df[[metric]]) %>% round(digits=2)
# Create a density plot
return(ggplot(df, aes(x=.data[[metric]])) +
geom_density() +
theme_pubr() +
stat_central_tendency(type = "mean", color = "red", linetype = "dashed") +
stat_central_tendency(type = "median", color = "blue", linetype = "dashed") +
# annotate("text", x = mean*1.01, y=0.1 , label = paste("Mean =", mean)) +
geom_vline(xintercept=c(mean+sd, mean-sd), color = "red", linetype="dotted") +
labs(title=title,
x=label) +
theme(plot.title = element_text(hjust = 0.5)))
}
assess_normality = function(df, metric, title, label) {
# Create a density plot
print(plot_density(df, metric, title, label))
ggsave(sprintf("%snormality/metrics/%s_density_plot.png", exploratory_figures_path, metric))
# Create a QQ-plot
png(sprintf("%snormality/metrics/%s_qq_plot.png", exploratory_figures_path, metric))
qqPlot(df[[metric]], ylab=label, main=paste("QQ-plot of", title))
dev.off()
qqPlot(df[[metric]], ylab=label, main=paste("QQ-plot of", title))
# Perform Shapiro-Wilk test
shapiro_test = shapiro.test(df[[metric]])
print(title)
print(shapiro_test)
}
assess_normalities = function(df, metrics, titles, labels) {
assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
for(i in 1:length(metrics)) {
assess_normality(df, metrics[i], titles[i], labels[i])
}
}
# Assess normality for Energy, CPU and Memory
assess_normalities(data, metrics, metric_titles, metric_labels)
## Saving 7 x 5 in image
## [1] "Energy"
##
## Shapiro-Wilk normality test
##
## data: df[[metric]]
## W = 0.97065, p-value = 1.712e-12
## Saving 7 x 5 in image
## [1] "CPU"
##
## Shapiro-Wilk normality test
##
## data: df[[metric]]
## W = 0.90509, p-value < 2.2e-16
## Saving 7 x 5 in image
## [1] "Memory"
##
## Shapiro-Wilk normality test
##
## data: df[[metric]]
## W = 0.90574, p-value < 2.2e-16
# Assess normality for Network metrics
assess_normalities(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image
## [1] "TCP #packets"
##
## Shapiro-Wilk normality test
##
## data: df[[metric]]
## W = 0.81779, p-value = 1e-13
## Saving 7 x 5 in image
## [1] "TCP size"
##
## Shapiro-Wilk normality test
##
## data: df[[metric]]
## W = 0.5628, p-value < 2.2e-16
## Saving 7 x 5 in image
## [1] "UDP #packets"
##
## Shapiro-Wilk normality test
##
## data: df[[metric]]
## W = 0.26134, p-value < 2.2e-16
## Saving 7 x 5 in image
## [1] "UDP size"
##
## Shapiro-Wilk normality test
##
## data: df[[metric]]
## W = 0.16913, p-value < 2.2e-16
If p<0.05 we reject the null hypothesis that the metric is
normally distributed.
Otherwise we retain the assumption of normality.
To see if the data shows great difference between devices we make density plots for all metrics per device
plot_density_per_device = function(df, metric, title, label) {
return(ggplot(df, aes(x=.data[[metric]], fill=device)) +
geom_density(alpha=0.4) +
theme_pubr() +
labs(title=sprintf("Density plot of %s per Device", title),
x=label) +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5)))
}
plot_densities_per_device = function(df, metrics, titles, labels) {
assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
for(i in 1:length(metrics)) {
print(plot_density_per_device(df, metrics[i], titles[i], labels[i]))
ggsave(sprintf("%smetrics_per_device/%s_density_plot_per_device.png", exploratory_figures_path, metrics[i]))
}
}
# Energy, CPU and Memory
plot_densities_per_device(data, metrics, metric_titles, metric_labels)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
# Network metrics
plot_densities_per_device(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
As the memory data is clearly multimodal, even when split on device, we make density plots for our different independent variables to identify other causes.
ggplot(data, aes(x=message_type, y=memory, group=message_type)) +
facet_grid(device ~ app) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
# theme_pubr() #+
labs(title="Violin plot of Memory per group",
x="Message type",
y="Memory") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5),
axis.text=element_text(size=8))
## Warning: The `show_guide` argument of `layer()` is deprecated as of ggplot2 2.0.0.
## ℹ Please use the `show.legend` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggsave(sprintf("%smemory/memory_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image
We see that memory differs a lot for Messenger, and for the Samsung device differences between Telegram and WhatsApp are also large.
ggplot(data, aes(x=message_type, y=cpu, group=message_type)) +
facet_grid(device ~ app) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
# theme_pubr() #+
labs(title="Violin plot of CPU per group",
x="Message type",
y="CPU") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5),
axis.text=element_text(size=8))
ggsave(sprintf("%scpu/cpu_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image
TCP is also clearly multi-modal and therefore we further inspect network metrics as well and make density plots for our different independent variables.
plot_violin_box_network_metric_facet = function(df, metric, title, label) {
return(ggplot(df, aes(x=size, y=.data[[metric]], group=size)) +
facet_grid(device ~ app) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=size)) +
geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
# theme_pubr() #+
labs(title=sprintf("Violin plot of %s per group", title),
# x="",
y=label) +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5)))
}
plot_violin_box_network_metrics_facet = function(df, metrics, titles, labels) {
assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
for(i in 1:length(metrics)) {
print(plot_violin_box_network_metric_facet(df, metrics[i], titles[i], labels[i]))
ggsave(sprintf("%snetwork/per_group/%s_violin_box_plot_per_group.png", exploratory_figures_path, metrics[i]))
}
}
plot_violin_box_network_metrics_facet(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
Again we see a similar pattern with Messenger being very different from
Telegram and WhatsApp for TCP. For UDP we do not see such a difference.
Furthermore, for all network measures we spot several extreme
values.
descriptives_network_metric = function(mt) {
return(network_data %>%
group_by(app, size) %>%
summarise(across(all_of(mt), .fns =
list(min = min,
median = median,
mean = mean,
sd = sd,
max = max))))
}
descriptives_network_metrics = function(metrics) {
for(metric in metrics) {
print(descriptives_network_metric(metric))
}
}
descriptives_network_metrics(network_metrics)
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups: app [3]
## app size tcp_no_min tcp_no_median tcp_no_mean tcp_no_sd tcp_no_max
## <fct> <fct> <int> <dbl> <dbl> <dbl> <int>
## 1 Telegram small 2872 3664. 3735. 725. 6462
## 2 Telegram medium 2730 3668. 3978. 1725. 11215
## 3 Telegram large 3504 3762 4244. 2124. 13247
## 4 WhatsApp small 151 2854 2735. 994. 5136
## 5 WhatsApp medium 2745 2899 3238. 1410. 9204
## 6 WhatsApp large 2734 2885 3601. 3146. 16959
## 7 Messenger small 1536 7764. 7589. 1823. 10167
## 8 Messenger medium 6271 7868. 8039. 2155. 16322
## 9 Messenger large 5990 7522 7641. 1083. 8959
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups: app [3]
## app size tcp_size_min tcp_size_median tcp_size_mean tcp_size_sd
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Telegram small 0.725 0.938 1.02 0.472
## 2 Telegram medium 0.801 0.972 1.26 1.35
## 3 Telegram large 0.947 1 1.35 1.57
## 4 WhatsApp small 0.021 0.67 0.680 0.367
## 5 WhatsApp medium 0.674 0.689 0.972 1.19
## 6 WhatsApp large 0.708 0.735 1.36 2.74
## 7 Messenger small 0.747 2 2.24 0.582
## 8 Messenger medium 2 2 2.6 1.57
## 9 Messenger large 2 2 2.35 0.489
## # ℹ 1 more variable: tcp_size_max <dbl>
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups: app [3]
## app size udp_no_min udp_no_median udp_no_mean udp_no_sd udp_no_max
## <fct> <fct> <int> <dbl> <dbl> <dbl> <int>
## 1 Telegram small 2 65 86.4 73.3 255
## 2 Telegram medium 6 88.5 127. 107. 381
## 3 Telegram large 6 140 179 153. 561
## 4 WhatsApp small 8 62.5 129. 154. 547
## 5 WhatsApp medium 8 40 95.2 108. 341
## 6 WhatsApp large 8 59.5 109. 111. 380
## 7 Messenger small 134 352. 434. 285. 1442
## 8 Messenger medium 227 426 1027. 2027. 9401
## 9 Messenger large 256 463 466. 175. 792
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups: app [3]
## app size udp_size_min udp_size_median udp_size_mean udp_size_sd
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Telegram small 0 0.019 0.0356 0.0419
## 2 Telegram medium 0.001 0.0395 0.0587 0.0605
## 3 Telegram large 0.001 0.0795 0.0998 0.106
## 4 WhatsApp small 0.001 0.0155 0.0564 0.0808
## 5 WhatsApp medium 0.001 0.004 0.0418 0.0648
## 6 WhatsApp large 0.001 0.0125 0.0444 0.0631
## 7 Messenger small 0.059 0.138 0.214 0.211
## 8 Messenger medium 0.062 0.211 0.769 1.99
## 9 Messenger large 0.063 0.217 0.240 0.111
## # ℹ 1 more variable: udp_size_max <dbl>
Due to outliers, tables with descriptive statistics are more informative.
plot_violin_box_network_metric_per_app = function(df, metric, title, label) {
return(ggplot(df, aes(x=app, y=.data[[metric]], group=app)) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=app)) +
geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
# theme_pubr() #+
labs(title=sprintf("Violin plot of %s per app", title),
# x="",
y=label) +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5)))
}
plot_violin_box_network_metrics_per_app = function(df, metrics, titles, labels) {
assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
for(i in 1:length(metrics)) {
print(plot_violin_box_network_metric_per_app(df, metrics[i], titles[i], labels[i]))
ggsave(sprintf("%snetwork/per_app/%s_violin_box_plot_per_app.png", exploratory_figures_path, metrics[i]))
}
}
plot_violin_box_network_metrics_per_app(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
The pattern clearly returns on the aggregated level
To be able to explain the energy consumption, we explore its
relationship with the support metrics.
We will use the pearson correlation if the support metric and energy are
both normally distributed. We will use the non-parametric spearman
correlation otherwise.
plot_scatter = function(df, xMetric, yMetric, xTitle, yTitle, xLabel, yLabel, correlation) {
return(ggplot(df, aes(x=.data[[xMetric]], y=.data[[yMetric]])) +
geom_point() +
geom_smooth(method='lm', formula=y ~ x) +
stat_cor(method=correlation) +
theme_pubr() +
labs(title=sprintf("Scatterplot of %s and %s", xTitle, yTitle),
x=xLabel,
y=yLabel) +
theme(plot.title = element_text(hjust = 0.5)))
}
plot_scatters = function(df, metrics, titles, labels, correlations) {
assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels) & length(metrics) == length(correlations))
for(i in 2:length(metrics)) {
print(plot_scatter(df, metrics[1], metrics[i], titles[1], titles[i], labels[1], labels[i], correlations[i]))
ggsave(sprintf("%smetric_correlations/energy/%s_%s_scatter_plot.png", exploratory_figures_path, metrics[1], metrics[i]))
}
}
# Scatter plots with CPU and Memory
correlations = c("spearman", "spearman", "spearman")
plot_scatters(data, metrics, metric_titles, metric_labels, correlations)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
# Scatter plots with Network metrics
network_correlations = c("spearman", "spearman", "spearman", "spearman", "spearman")
plot_scatters(network_data, c("energy", network_metrics), c("Energy", network_metric_titles), c("Energy (Joule)", network_metric_labels), network_correlations)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
As earlier we found that energy is non-normally distributed, we have
only used Spearman’s correlation.
Energy and CPU show a clear relation, whereas the other support metrics
do not.
Because the duration of the trials varies so much, we decided to see how that affects our metrics
plot_scatter(data, metrics[1], "time_sec", metric_titles[1], "Duration", metric_labels[1], "Trial duration (seconds)", "spearman")
ggsave(sprintf("%smetric_correlations/duration/%s_duration_scatter_plot.png", exploratory_figures_path, metrics[1]))
## Saving 7 x 5 in image
Again we used spearman’s correlation, among others because of the
non-normality of energy consumption.
We see a very clear correlation between energy and duration which should
be taken into account when interpreting the results.
plot_scatter_duration = function(df, xMetric, yMetric, xTitle, yTitle, xLabel, yLabel, correlation) {
return(ggplot(df, aes(x=.data[[xMetric]], y=.data[[yMetric]])) +
geom_point(aes(color=app)) +
geom_smooth(method='lm', formula=y ~ x, color="black") +
stat_cor(method=correlation) +
theme_pubr() +
labs(title=sprintf("Scatterplot of %s and %s", xTitle, yTitle),
x=xLabel,
y=yLabel) +
theme(plot.title = element_text(hjust = 0.5)))
}
plot_scatters_duration = function(df, metrics, titles, labels, correlations) {
assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels) & length(metrics) == length(correlations))
for(i in 2:length(metrics)) {
print(plot_scatter_duration(df, metrics[1], metrics[i], titles[1], titles[i], labels[1], labels[i], correlations[i]))
ggsave(sprintf("%smetric_correlations/duration/duration_%s_scatter_plot.png", exploratory_figures_path, metrics[i]))
}
}
# Scatter plots with Network metrics
network_correlations = c("spearman", "spearman", "spearman", "spearman", "spearman")
plot_scatters_duration(network_data, c("time_sec", network_metrics), c("Duration", network_metric_titles), c("Trial duration (seconds)", network_metric_labels), network_correlations)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
Because the TCP data showed clear differences between apps, we color the
datapoints based on that.\ Based on that we see no clear correlation
emerging between duration and the network metrics.
To explore our dependent variable further we look how energy is distributed for our different independent variables
plot_violin_box = function (df, group, title, label) {
return(ggplot(data, aes(x=.data[[group]], y=energy, fill=.data[[group]])) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE) +
geom_boxplot(alpha=1, color="black", width=.4, fill="white") +
stat_summary(fun=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
theme_pubr() +
labs(title=paste("Violin plot of Energy per", title),
x=label,
y="Energy (Joules)") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5)))
}
plot_density_per_group = function (df, group, title, label) {
return(ggplot(data, aes(x=energy, fill=.data[[group]])) +
geom_density(alpha=0.4) +
theme_pubr() +
labs(title=paste("Density plot of Energy per", title),
x="Energy (Joules)") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5)))
}
groups = c("message_type", "app", "size")
titles = c("message type", "app", "size")
labels = c("Message type", "App", "size")
plots_per_group = function(df, groups, titles, labels) {
assert("groups, titles and labels are of equal length", length(groups) == length(titles) & length(groups) == length(labels))
for(i in 1:length(groups)) {
print(plot_violin_box(data, groups[i], titles[i], labels[i]))
ggsave(sprintf("%senergy/per_%s/energy_violin_box_plot_per_%s.png", exploratory_figures_path, groups[i], groups[i]))
print(plot_density_per_group(data, groups[i], titles[i], labels[i]))
ggsave(sprintf("%senergy/per_%s/energy_density_plot_per_%s.png", exploratory_figures_path, groups[i], groups[i]))
}
}
plots_per_group(data, groups, titles, labels)
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
We also investigate how energy is distributed for all 45 groups (combination of message type, app and size)
ggplot(data, aes(x=message_type, y=energy, group=message_type)) +
facet_grid(size ~ app) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
# theme_pubr() #+
labs(title="Violin plot of Energy per group",
x="Message type",
y="Energy (Joules)") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5),
axis.text=element_text(size=8))
ggsave(sprintf("%senergy/energy_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image
Because duration correlated strongly with energy consumption we explore how it is different for each group too
ggplot(data, aes(x=message_type, y=time_sec, group=message_type)) +
facet_grid(size ~ app) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
# theme_pubr() #+
labs(title="Violin plot of Duration per group",
x="Message type",
y="Trial duration (seconds)") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5),
axis.text=element_text(size=8))
ggsave(sprintf("%sduration/duration_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image
We find that for Messenger the duration differs greatly for the different message types.
ggplot(data[data$app=="Messenger",], aes(x=time_sec, fill=message_type)) +
geom_density(alpha=0.4) +
theme_pubr() +
labs(title=paste("Density plot of Duration per message type on Messenger"),
x="Trial duration (seconds)") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5))
ggsave(sprintf("%sduration/duration_density_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image
Which we can also see (possibly even more clearly) in this plot
Now we dive into the different research questions and calculate descriptive statistics for them.
For the first research question we check how the energy differs per message type
summary_rq1_small = data %>%
group_by(message_type) %>%
summarise(count = n(),
mean = mean(energy),
sd = sd(energy))
summary_rq1_small
## # A tibble: 5 × 4
## message_type count mean sd
## <fct> <int> <dbl> <dbl>
## 1 text 180 110. 30.9
## 2 image 180 115. 36.3
## 3 video 180 102. 34.1
## 4 audio 180 107. 32.2
## 5 document 180 110. 34.5
and how that differs per size as well
summary_rq1_large = data %>%
group_by(message_type, size) %>%
summarise(count = n(),
mean = mean(energy),
sd = sd(energy))
## `summarise()` has grouped output by 'message_type'. You can override using the
## `.groups` argument.
summary_rq1_large
## # A tibble: 15 × 5
## # Groups: message_type [5]
## message_type size count mean sd
## <fct> <fct> <int> <dbl> <dbl>
## 1 text small 60 112. 34.9
## 2 text medium 60 103. 29.3
## 3 text large 60 116. 27.0
## 4 image small 60 111. 43.1
## 5 image medium 60 116. 31.2
## 6 image large 60 119. 33.7
## 7 video small 60 96.0 37.0
## 8 video medium 60 103. 35.4
## 9 video large 60 109. 28.9
## 10 audio small 60 99.8 30.5
## 11 audio medium 60 105. 30.6
## 12 audio large 60 117. 33.4
## 13 document small 60 99.3 32.0
## 14 document medium 60 114. 29.3
## 15 document large 60 118. 39.2
Text: medium size lower mean than small and large (not for other message_types)
For the first research question we check how the energy differs per app
summary_rq2_small = data %>%
group_by(app) %>%
summarise(count = n(),
mean = mean(energy),
sd = sd(energy))
summary_rq2_small
## # A tibble: 3 × 4
## app count mean sd
## <fct> <int> <dbl> <dbl>
## 1 Telegram 300 109. 33.8
## 2 WhatsApp 300 116. 31.4
## 3 Messenger 300 103. 35.1
and also how that differs for each message type
summary_rq2_large = data %>%
group_by(app, message_type) %>%
summarise(count = n(),
mean = mean(energy),
sd = sd(energy))
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
summary_rq2_large
## # A tibble: 15 × 5
## # Groups: app [3]
## app message_type count mean sd
## <fct> <fct> <int> <dbl> <dbl>
## 1 Telegram text 60 109. 36.2
## 2 Telegram image 60 115. 34.6
## 3 Telegram video 60 107. 33.0
## 4 Telegram audio 60 104. 34.2
## 5 Telegram document 60 109. 31.1
## 6 WhatsApp text 60 103. 26.5
## 7 WhatsApp image 60 128. 34.8
## 8 WhatsApp video 60 119. 26.5
## 9 WhatsApp audio 60 115. 27.5
## 10 WhatsApp document 60 116. 36.2
## 11 Messenger text 60 119. 27.5
## 12 Messenger image 60 104. 36.2
## 13 Messenger video 60 80.7 30.9
## 14 Messenger audio 60 104. 33.9
## 15 Messenger document 60 107. 36.0
and also how that differs per size
summary = data %>%
group_by(app, message_type, size) %>%
summarise(count = n(),
mean = mean(energy),
sd = sd(energy))
## `summarise()` has grouped output by 'app', 'message_type'. You can override
## using the `.groups` argument.
summary
## # A tibble: 45 × 6
## # Groups: app, message_type [15]
## app message_type size count mean sd
## <fct> <fct> <fct> <int> <dbl> <dbl>
## 1 Telegram text small 20 118. 51.5
## 2 Telegram text medium 20 99.3 25.9
## 3 Telegram text large 20 109. 23.8
## 4 Telegram image small 20 107. 41.7
## 5 Telegram image medium 20 118. 28.3
## 6 Telegram image large 20 119. 33.1
## 7 Telegram video small 20 107. 38.9
## 8 Telegram video medium 20 105. 35.0
## 9 Telegram video large 20 110. 25.1
## 10 Telegram audio small 20 93.2 22.1
## # ℹ 35 more rows
We plan to use an ANOVA with 3 independent variables (size, IM app and message type)
The ANOVA has several assumptions which we need to check
The first assumption is normality, which needs to be checked for the dependent variable and the residuals
We check the normality of the dependent variable (energy) for every
combination of groups in multiple ways:
(i) By testing for it statistically using the Shapiro-Wilk test
shapiro = data %>%
group_by(app, message_type, size) %>%
summarise(w.statistic = shapiro.test(energy)$statistic,
p.value = shapiro.test(energy)$p.value) %>%
arrange(w.statistic)
## `summarise()` has grouped output by 'app', 'message_type'. You can override
## using the `.groups` argument.
shapiro
## # A tibble: 45 × 5
## # Groups: app, message_type [15]
## app message_type size w.statistic p.value
## <fct> <fct> <fct> <dbl> <dbl>
## 1 Telegram document large 0.639 0.00000763
## 2 Messenger audio large 0.699 0.0000379
## 3 Telegram video small 0.738 0.000116
## 4 Telegram image large 0.741 0.000129
## 5 Telegram image small 0.743 0.000138
## 6 Telegram document small 0.754 0.000194
## 7 Telegram text large 0.759 0.000224
## 8 Messenger image large 0.772 0.000340
## 9 Telegram text small 0.810 0.00124
## 10 WhatsApp document large 0.819 0.00169
## # ℹ 35 more rows
If p<0.05 we reject the null hypothesis that the dependent
variable (energy) is normally distributed.
Otherwise we retain the assumption of normality.
As almost all p-values are significant, the data does not meet the
assumption of normality.
# Function to index a grouped dataframe using a row number and column name
index_df = function(df, row, column_name) {
return(unlist(df[row, column_name]))
}
# Function to subset the Data dataset based on values in the Shapiro dataset
subset_data = function(row) {
subset = subset(data, app==index_df(shapiro, row, "app") & size==index_df(shapiro, row, "size" & message_type==index_df(shapiro, row, "message_type")))
return(subset)
}
for(i in 1:nrow(shapiro)) {
# Extract the right column values
app_val = index_df(shapiro, i, "app")
size_val = index_df(shapiro, i, "size")
message_type_val = index_df(shapiro, i, "message_type")
# Create a subset using these
subset = subset(data, app==app_val & size==size_val & message_type==message_type_val)
# Create a density plot
png(sprintf("%snormality/energy_per_group/energy_density_plot_for_%s_messages_on_%s_of_%s_size.png", exploratory_figures_path, message_type_val, tolower(app_val), size_val))
plot(density(subset$energy), main=sprintf("Density plot for %s messages on %s of %s size", message_type_val, app_val, size_val))
dev.off()
plot(density(subset$energy), main=sprintf("Density plot for %s messages on %s of %s size", message_type_val, app_val, size_val))
# Create a QQ-plot
png(sprintf("%snormality/energy_per_group/energy_qq_plot_for_%s_messages_on_%s_of_%s_size.png", exploratory_figures_path, message_type_val, app_val, size_val))
qqPlot(subset$energy, main=sprintf("QQ-plot for %s messages on %s of size %s", message_type_val, app_val, size_val), ylab="Energy (Joules)")
dev.off()
qqPlot(subset$energy, main=sprintf("QQ-plot for %s messages on %s of size %s", message_type_val, app_val, size_val), ylab="Energy (Joules)")
}
Also these indicate that the assumption of normality is not met.
The residuals also need to be normally distributed, which we also assess by:
res.aov = aov(energy ~ message_type * app * size, data = data)
residuals = residuals(res.aov)
shapiro.test(residuals)
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.95939, p-value = 4.252e-15
If p<0.05 we reject the null hypothesis that the residuals are
normally distributed.
Otherwise we retain the assumption of normality.
Given the p-value, we have to reject the null hypothesis that the
residuals are normally distributed.
# Create a density plot
png(sprintf("%snormality/energy_residuals_density_plot.png", exploratory_figures_path))
plot(density(residuals), main="Density plot of the Residuals")
dev.off()
## png
## 2
plot(density(residuals), main="Density plot of the Residuals")
# Create a QQ-plot
png(sprintf("%snormality/energy_residuals_qq_plot.png", exploratory_figures_path))
qqPlot(residuals, ylab="Residuals", main="QQ-plot of the Residuals")
## [1] 608 609
dev.off()
## png
## 2
qqPlot(residuals, ylab="Residuals", main="QQ-plot of the Residuals")
## [1] 608 609
We check whether the variances of each combination of groups are roughly equal
leveneTest(energy ~ message_type * app * size, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 44 0.7262 0.9079
## 855
If p<0.05 we reject the null hypothesis that all combinations of
groups have equal variance.
Otherwise we retain the assumption of equal variances.\ Though
homoscedacity was not violated, normality was violated for almost all
groups as well as for the residuals. Therefore, we chose to do use an
ART ANOVA instead.
We will measure the effect size of the significant effects using partial eta squared
art_transformed = art(energy ~ message_type * app * size, data = data)
summary(art_transformed)
## Aligned Rank Transform of Factorial Model
##
## Call:
## art(formula = energy ~ message_type * app * size, data = data)
##
## Column sums of aligned responses (should all be ~0):
## message_type app size
## 0 0 0
## message_type:app message_type:size app:size
## 0 0 0
## message_type:app:size
## 0
##
## F values of ANOVAs on aligned responses not of interest (should all be ~0):
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
res.art = anova(art_transformed)
# Include partial eta squared as effect size measure
res.art$eta.sq.part = with(res.art, `Sum Sq`/(`Sum Sq` + `Sum Sq.res`))
res.art
## Analysis of Variance of Aligned Rank Transformed Data
##
## Table Type: Anova Table (Type III tests)
## Model: No Repeated Measures (lm)
## Response: art(energy)
##
## Df Df.res F value Pr(>F) eta.sq.part
## 1 message_type 4 855 4.5240 0.0012747 0.020726 **
## 2 app 2 855 26.1792 9.2272e-12 0.057704 ***
## 3 size 2 855 14.8384 4.6249e-07 0.033545 ***
## 4 message_type:app 8 855 10.2722 8.7730e-14 0.087686 ***
## 5 message_type:size 8 855 1.4449 0.1738362 0.013339
## 6 app:size 4 855 3.7666 0.0048004 0.017316 **
## 7 message_type:app:size 16 855 2.4670 0.0011235 0.044129 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We will measure the effect size of the significant effects between levels using Cliff’s Delta
# Comparing differences per message_type (RQ1)
# We leave out size as message_type:size is non-significant
comparisons_message_type = summary(art.con(art_transformed, "message_type", adjust="BH"))
## NOTE: Results may be misleading due to involvement in interactions
# comparisons_message_type
c_delta_message_types = c()
# For all combinations of message_types
for(i in 1:(n_message_types-1)) {
for(j in (i+1):n_message_types) {
mt_i = message_types[i]
mt_j = message_types[j]
# Calculate Cliff's Delta
c_delta = cliff.delta(data[data$message_type==mt_i,]$energy, data[data$message_type==mt_j,]$energy)
# Save Cliff's Delta
c_delta_message_types = c(c_delta_message_types, c_delta$estimate)
}
}
comparisons_message_type$c.delta = c_delta_message_types
print(comparisons_message_type)
## contrast estimate SE df t.ratio p.value c.delta
## text - image -36.95 27.5 855 -1.344 0.2987 -0.0633
## text - video 75.80 27.5 855 2.757 0.0198 0.1319
## text - audio 17.07 27.5 855 0.621 0.5943 0.0586
## text - document -2.47 27.5 855 -0.090 0.9284 -0.0155
## image - video 112.75 27.5 855 4.102 0.0004 0.1946
## image - audio 54.02 27.5 855 1.965 0.0995 0.1251
## image - document 34.48 27.5 855 1.254 0.3001 0.0715
## video - audio -58.73 27.5 855 -2.137 0.0823 -0.0755
## video - document -78.27 27.5 855 -2.847 0.0198 -0.1283
## audio - document -19.54 27.5 855 -0.711 0.5943 -0.0643
##
## Results are averaged over the levels of: app, size
## P value adjustment: BH method for 10 tests
Only text - video, video - image, video - document are significant
For the main research questions, we will first compare the differences between the apps only
# Comparing differences between apps (RQ2)
comparisons_app = summary(art.con(art_transformed, "app", adjust="BH"))
## NOTE: Results may be misleading due to involvement in interactions
# comparisons_app
c_delta_apps = c()
# For all combinations of message_types
for(i in 1:(n_apps-1)) {
for(j in (i+1):n_apps) {
app_i = apps[i]
app_j = apps[j]
# Calculate Cliff's Delta
c_delta = cliff.delta(data[data$app==app_i,]$energy, data[data$app==app_j,]$energy)
# Save Cliff's Delta
c_delta_apps = c(c_delta_apps, c_delta$estimate)
}
}
comparisons_app$c.delta = c_delta_apps
print(comparisons_app)
## contrast estimate SE df t.ratio p.value c.delta
## Telegram - WhatsApp -108.8 21 855 -5.183 <.0001 -0.2363
## Telegram - Messenger 37.4 21 855 1.781 0.0753 0.0758
## WhatsApp - Messenger 146.2 21 855 6.964 <.0001 0.2645
##
## Results are averaged over the levels of: message_type, size
## P value adjustment: BH method for 3 tests
WhatsApp is significantly worse than the others, there is no significant difference between Telegram and Messenger
Then we (as the interaction between app and size is significant) will also look at how the size plays a role
# Comparing differences between apps per size (RQ2)
comparisons_app_size = summary(art.con(art_transformed, "app:size", adjust="none"))
## NOTE: Results may be misleading due to involvement in interactions
# Select columns of interest
cols_comparisons_app_size = which(str_detect(comparisons_app_size$contrast , ".*small.*small.*|.*medium.*medium.*|.*large.*large.*"))
comparisons_app_size = comparisons_app_size[cols_comparisons_app_size, ]
# Correct p-values
comparisons_app_size$p.value = p.adjust(comparisons_app_size$p.value, method = "BH")
c_delta_app_size = c()
# For all combinations of message_types
for(size in sort(sizes)) {
for(i in 1:(n_apps-1)) {
for(j in (i+1):n_apps) {
app_i = sort(apps)[i]
app_j = sort(apps)[j]
# Calculate Cliff's Delta
c_delta = cliff.delta(data[data$size==size & data$app==app_i,]$energy, data[data$size==size & data$app==app_j,]$energy)
# Save Cliff's Delta
c_delta_app_size = c(c_delta_app_size, c_delta$estimate)
}
}
}
# print(c_delta_message_types)
comparisons_app_size$c.delta = c_delta_app_size
print(comparisons_app_size)
## contrast estimate SE df t.ratio p.value c.delta
## Messenger,large - Telegram,large -38.5 35.6 855 -1.082 0.3147 -0.0614
## Messenger,large - WhatsApp,large -89.5 35.6 855 -2.515 0.0181 -0.1804
## Messenger,medium - Telegram,medium 30.4 35.6 855 0.855 0.3926 -0.1508
## Messenger,medium - WhatsApp,medium -112.4 35.6 855 -3.158 0.0037 0.0438
## Messenger,small - Telegram,small -105.1 35.6 855 -2.953 0.0058 -0.2226
## Messenger,small - WhatsApp,small -234.7 35.6 855 -6.594 <.0001 -0.3250
## Telegram,large - WhatsApp,large -51.0 35.6 855 -1.433 0.1957 -0.2154
## Telegram,medium - WhatsApp,medium -142.9 35.6 855 -4.013 0.0003 -0.3860
## Telegram,small - WhatsApp,small -129.6 35.6 855 -3.640 0.0009 -0.2354
##
## Results are averaged over the levels of: message_type
For the subquestions we will also take into account the message type
First we will disregard the size and see if we can find differences between apps per message type
# Comparing differences between apps per message_type (RQ2)
comparisons_message_type_app = summary(art.con(art_transformed, "message_type:app", adjust="none"))
## NOTE: Results may be misleading due to involvement in interactions
# Select columns of interest
cols_comparisons_message_type_app = which(str_detect(comparisons_message_type_app$contrast , "^text.*text.*|^image.*image.*|^audio.*audio.*|^video.*video.*|^document.*document.*"))
comparisons_message_type_app = comparisons_message_type_app[cols_comparisons_message_type_app, ]
# Correct p-values
comparisons_message_type_app$p.value = p.adjust(comparisons_message_type_app$p.value, method = "BH")
# comparisons_message_type_app
c_delta_message_types_apps = c()
# For all combinations of message_types
for(mt in sort(message_types)) {
for(i in 1:(n_apps-1)) {
for(j in (i+1):n_apps) {
app_i = sort(apps)[i]
app_j = sort(apps)[j]
# Calculate Cliff's Delta
c_delta = cliff.delta(data[data$message_type==mt & data$app==app_i,]$energy, data[data$message_type==mt & data$app==app_j,]$energy)
# Save Cliff's Delta
c_delta_message_types_apps = c(c_delta_message_types_apps, c_delta$estimate)
}
}
}
# print(c_delta_message_types)
comparisons_message_type_app$c.delta = c_delta_message_types_apps
print(comparisons_message_type_app)
## contrast estimate SE df t.ratio p.value
## audio,Messenger - audio,Telegram -38.2 45.1 855 -0.847 0.4584
## audio,Messenger - audio,WhatsApp -142.2 45.1 855 -3.150 0.0045
## audio,Telegram - audio,WhatsApp -104.0 45.1 855 -2.303 0.0359
## document,Messenger - document,Telegram 28.2 45.1 855 0.624 0.5707
## document,Messenger - document,WhatsApp -73.3 45.1 855 -1.623 0.1312
## document,Telegram - document,WhatsApp -101.5 45.1 855 -2.247 0.0373
## image,Messenger - image,Telegram -76.5 45.1 855 -1.694 0.1235
## image,Messenger - image,WhatsApp -227.6 45.1 855 -5.042 <.0001
## image,Telegram - image,WhatsApp -151.1 45.1 855 -3.347 0.0032
## text,Messenger - text,Telegram 131.5 45.1 855 2.913 0.0069
## text,Messenger - text,WhatsApp 140.9 45.1 855 3.122 0.0045
## text,Telegram - text,WhatsApp 9.4 45.1 855 0.208 0.8351
## video,Messenger - video,Telegram -211.9 45.1 855 -4.694 <.0001
## video,Messenger - video,WhatsApp -351.2 45.1 855 -7.780 <.0001
## video,Telegram - video,WhatsApp -139.3 45.1 855 -3.086 0.0045
## c.delta
## -0.0772
## -0.3100
## -0.3161
## 0.0139
## -0.1400
## -0.2611
## -0.1811
## -0.4267
## -0.2744
## 0.3783
## 0.3606
## -0.0461
## -0.4739
## -0.6656
## -0.3061
##
## Results are averaged over the levels of: size
plot_violin_box_message_type = function(df, mt) {
return(ggplot(df[df$message_type==mt,], aes(x=app, y=energy, group=app)) +
geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=app)) +
geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=5, size=1, show_guide = FALSE) +
# theme_pubr() #+
labs(title=sprintf("Violin plot of Energy of %s messages per app", mt),
# x="",
y="Energy (Joules)") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5)))
}
plot_density_message_type = function(df, mt) {
return(ggplot(df[df$message_type==mt,], aes(x=energy, fill=app)) +
geom_density(alpha=0.4) +
theme_pubr() +
labs(title=sprintf("Density plot of Energy of %s messages per app", mt),
x="Energy (Joules)") +
theme(legend.title=element_blank(),
plot.title = element_text(hjust = 0.5)))
}
plot_message_type = function(df, mts) {
for(mt in mts) {
print(plot_violin_box_message_type(df, mt))
ggsave(sprintf("%senergy/per_app/message_type/energy_violin_box_plot_for_%s_messages_per_app.png", exploratory_figures_path, mt))
print(plot_density_message_type(df, mt))
ggsave(sprintf("%senergy/per_app/message_type/energy_density_plot_for_%s_messages_per_app.png", exploratory_figures_path, mt))
}
}
plot_message_type(data, c("audio", "document", "image", "text", "video"))
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
For documents we can only say that Telegram is significantly more energy
efficient than WhatsApp.
For audio we can only say that Messenger is significantly more energy
efficient than WhatsApp.
For images we can only say that Telegram and Messenger are significantly
more energy efficient than WhatsApp.
For text we can say that Messenger is significantly more efficient than
WhatsApp and Telegram, but we cannot differentiate between WhatsApp and
Telegram in terms of energy consumption.
For video we can say that Messenger is significantly more energy
efficient than Telegram which is is significantly more energy efficient
than WhatsApp (and Messenger is (thus) also significantly more energy
efficient than WhatsApp).
Then we will also include size to see if we still can make claims at the most detailed level
# Comparing differences between apps per message_type and size (RQ2)
comparisons_message_type_app_size = summary(art.con(art_transformed, "message_type:app:size", adjust="none"))
# Select columns of interest
cols_comparisons_message_type_app_size = which(str_detect(comparisons_message_type_app_size$contrast , "(?=^text.*text.*|^image.*image.*|^audio.*audio.*|^video.*video.*|^document.*document.*)(?=.*small.*small$|.*medium.*medium$|.*large.*large$)"))
comparisons_message_type_app_size = comparisons_message_type_app_size[cols_comparisons_message_type_app_size, ]
# Correct p-values
comparisons_message_type_app_size$p.value = p.adjust(comparisons_message_type_app_size$p.value, method= "BH")
# comparisons_message_type_app_size
c_delta_message_types_apps_sizes = c()
# For all combinations of message_types
for(size in sort(sizes)) {
for(mt in sort(message_types)) {
for(i in 1:(n_apps-1)) {
for(j in (i+1):n_apps) {
app_i = sort(apps)[i]
app_j = sort(apps)[j]
# Calculate Cliff's Delta
c_delta = cliff.delta(data[data$size==size & data$message_type==mt & data$app==app_i,]$energy, data[data$size==size & data$message_type==mt & data$app==app_j,]$energy)
# Save Cliff's Delta
c_delta_message_types_apps_sizes = c(c_delta_message_types_apps_sizes, c_delta$estimate)
}
}
}
}
# print(c_delta_message_types)
comparisons_message_type_app_size$c.delta = c_delta_message_types_apps_sizes
print(comparisons_message_type_app_size)
## contrast estimate SE df t.ratio
## audio,Messenger,large - audio,Telegram,large -222.80 75.1 855 -2.965
## audio,Messenger,large - audio,WhatsApp,large -199.90 75.1 855 -2.660
## audio,Messenger,medium - audio,Telegram,medium 127.25 75.1 855 1.694
## audio,Messenger,medium - audio,WhatsApp,medium -144.90 75.1 855 -1.928
## audio,Messenger,small - audio,Telegram,small 36.95 75.1 855 0.492
## audio,Messenger,small - audio,WhatsApp,small -105.45 75.1 855 -1.403
## audio,Telegram,large - audio,WhatsApp,large 22.90 75.1 855 0.305
## audio,Telegram,medium - audio,WhatsApp,medium -272.15 75.1 855 -3.622
## audio,Telegram,small - audio,WhatsApp,small -142.40 75.1 855 -1.895
## document,Messenger,large - document,Telegram,large -5.00 75.1 855 -0.067
## document,Messenger,large - document,WhatsApp,large -44.75 75.1 855 -0.596
## document,Messenger,medium - document,Telegram,medium 222.90 75.1 855 2.967
## document,Messenger,medium - document,WhatsApp,medium 60.55 75.1 855 0.806
## document,Messenger,small - document,Telegram,small -192.75 75.1 855 -2.565
## document,Messenger,small - document,WhatsApp,small -259.30 75.1 855 -3.451
## document,Telegram,large - document,WhatsApp,large -39.75 75.1 855 -0.529
## document,Telegram,medium - document,WhatsApp,medium -162.35 75.1 855 -2.161
## document,Telegram,small - document,WhatsApp,small -66.55 75.1 855 -0.886
## image,Messenger,large - image,Telegram,large 4.95 75.1 855 0.066
## image,Messenger,large - image,WhatsApp,large -64.30 75.1 855 -0.856
## image,Messenger,medium - image,Telegram,medium -135.25 75.1 855 -1.800
## image,Messenger,medium - image,WhatsApp,medium -215.55 75.1 855 -2.869
## image,Messenger,small - image,Telegram,small -89.05 75.1 855 -1.185
## image,Messenger,small - image,WhatsApp,small -358.65 75.1 855 -4.773
## image,Telegram,large - image,WhatsApp,large -69.25 75.1 855 -0.922
## image,Telegram,medium - image,WhatsApp,medium -80.30 75.1 855 -1.069
## image,Telegram,small - image,WhatsApp,small -269.60 75.1 855 -3.588
## text,Messenger,large - text,Telegram,large 121.70 75.1 855 1.620
## text,Messenger,large - text,WhatsApp,large 64.30 75.1 855 0.856
## text,Messenger,medium - text,Telegram,medium 153.30 75.1 855 2.040
## text,Messenger,medium - text,WhatsApp,medium 152.25 75.1 855 2.026
## text,Messenger,small - text,Telegram,small 149.70 75.1 855 1.992
## text,Messenger,small - text,WhatsApp,small 212.20 75.1 855 2.824
## text,Telegram,large - text,WhatsApp,large -57.40 75.1 855 -0.764
## text,Telegram,medium - text,WhatsApp,medium -1.05 75.1 855 -0.014
## text,Telegram,small - text,WhatsApp,small 62.50 75.1 855 0.832
## video,Messenger,large - video,Telegram,large -93.70 75.1 855 -1.247
## video,Messenger,large - video,WhatsApp,large -175.25 75.1 855 -2.332
## video,Messenger,medium - video,Telegram,medium -228.95 75.1 855 -3.047
## video,Messenger,medium - video,WhatsApp,medium -390.55 75.1 855 -5.198
## video,Messenger,small - video,Telegram,small -253.75 75.1 855 -3.377
## video,Messenger,small - video,WhatsApp,small -423.00 75.1 855 -5.630
## video,Telegram,large - video,WhatsApp,large -81.55 75.1 855 -1.085
## video,Telegram,medium - video,WhatsApp,medium -161.60 75.1 855 -2.151
## video,Telegram,small - video,WhatsApp,small -169.25 75.1 855 -2.253
## p.value c.delta
## 0.0140 -0.520
## 0.0275 -0.490
## 0.1633 0.080
## 0.1107 0.140
## 0.6838 0.020
## 0.2681 -0.260
## 0.8149 0.120
## 0.0032 -0.270
## 0.1143 -0.220
## 0.9690 0.360
## 0.6365 0.200
## 0.0140 -0.200
## 0.5115 -0.215
## 0.0337 -0.370
## 0.0044 -0.185
## 0.6715 0.165
## 0.0794 -0.280
## 0.5045 -0.635
## 0.9690 0.590
## 0.5045 0.050
## 0.1354 -0.400
## 0.0173 -0.405
## 0.3667 -0.400
## <.0001 -0.095
## 0.5020 0.465
## 0.4144 0.390
## 0.0032 -0.080
## 0.1829 -0.525
## 0.5045 -0.680
## 0.0969 -0.345
## 0.0969 0.065
## 0.1000 -0.185
## 0.0182 -0.335
## 0.5271 -0.605
## 0.9889 -0.565
## 0.5072 -0.140
## 0.3419 -0.225
## 0.0597 -0.580
## 0.0134 -0.475
## <.0001 0.380
## 0.0049 0.565
## <.0001 -0.005
## 0.4144 -0.745
## 0.0794 -0.885
## 0.0690 -0.385